import numpy as np

def get_stats_without_outliers(data):
 """
 This function calculates the minimum, maximum, 25th percentile, 50th percentile (median) and 75th percentile for each dataset after removing outliers.

 Args:
     data: A list of lists, where each sublist represents a dataset.

 Returns:
     A NumPy array containing the statistics for each dataset after removing outliers.
 """

 # Calculate IQR (Interquartile Range)
 iqr = []
 for d in data:
     q1 = np.percentile(d, 25)
     q3 = np.percentile(d, 75)
     iqr.append(q3 - q1)

 # Define outlier bounds
 lower_bound = []
 upper_bound = []
 so=[]
 for d, iq in zip(data, iqr):
     q1 = np.percentile(d, 25)
     q3 = np.percentile(d, 75)
     lower_bound.append(q1 - 1.5 * iq)
     upper_bound.append(q3 + 1.5 * iq)
     so.append(len(d))
 # Remove outliers
 data_wo_outliers = []
 for d, lb, ub in zip(data, lower_bound, upper_bound):
     filtered_data = [x for x in d if (x >= lb and x <= ub)]
     data_wo_outliers.append(filtered_data)

 # Calculate minimum, maximum, quartiles and percentiles after removing outliers
 stats = []
 i=0
 for d in data_wo_outliers:
     stats.append([int(so[i]),int(len(d)),np.min(d), np.percentile(d, 25), np.percentile(d, 50), np.percentile(d, 75), np.max(d)])
     i=i+1
 return np.array(stats)

###############################################################################################################################################
def get_stats_all_data(data):
 """
 This function calculates the minimum, maximum, 25th percentile, 50th percentile (median) and 75th percentile for each dataset.

 Args:
     data: A list of lists, where each sublist represents a dataset.

 Returns:
     A NumPy array containing the statistics for each dataset.
 """
 # Calculate minimum, maximum, quartiles and percentiles after removing outliers
 stats = []
 for d in data:
     stats.append([np.min(d), np.percentile(d, 25), np.percentile(d, 50), np.percentile(d, 75), np.max(d)])

 return np.array(stats)

############################################################################################################################################
"""

def list_remove_outliers_IQR(data):


  # oves outliers from a list of numerical array using the Interquartile Range (IQR) method.

 # Args:
 #   data: A list of lists, where each sublist represents a dataset.

 # Returns:
 #   A list of lists, where each sublist represents a dataset afrter removing outlier.
 
 

 # Calculate IQR (Interquartile Range)
 iqr = []
 for d in data:
     q1 = np.percentile(d, 25)
     q3 = np.percentile(d, 75)
     iqr.append(q3 - q1)

 # Define outlier bounds
 lower_bound = []
 upper_bound = []
 for d, iq in zip(data, iqr):
     q1 = np.percentile(d, 25)
     q3 = np.percentile(d, 75)
     lower_bound.append(q1 - 1.5 * iq)
     upper_bound.append(q3 + 1.5 * iq)

 # Remove outliers
 data_wo_outliers = []
 for d, lb, ub in zip(data, lower_bound, upper_bound):
     filtered_data = [x for x in d if (x >= lb and x <= ub)]
     data_wo_outliers.append(filtered_data)

 
 return np.array(data_wo_outliers)

"""

################################################################################################################################################################
def remove_outliers_IQR(data):
 """
  Removes outliers from a numerical array using the Interquartile Range (IQR) method.

  Args:
    data: A 1D NumPy array containing the numerical data.

  Returns:
    A 1D NumPy array with the outliers removed.
  """

 Q1 = np.percentile(data, 25)
 Q3 = np.percentile(data, 75)
 IQR = Q3 - Q1
 lower_bound = Q1 - 1.5 * IQR
 upper_bound = Q3 + 1.5 * IQR
 return data[(data >= lower_bound) & (data <= upper_bound)]
    
